library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tidymodels)
## ── Attaching packages ────────────────────────────────────── tidymodels 1.1.1 ──
## ✔ broom        1.0.5     ✔ rsample      1.2.0
## ✔ dials        1.2.0     ✔ tune         1.1.2
## ✔ infer        1.0.5     ✔ workflows    1.1.3
## ✔ modeldata    1.2.0     ✔ workflowsets 1.0.1
## ✔ parsnip      1.1.1     ✔ yardstick    1.2.0
## ✔ recipes      1.0.9     
## ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
## ✖ scales::discard() masks purrr::discard()
## ✖ dplyr::filter()   masks stats::filter()
## ✖ recipes::fixed()  masks stringr::fixed()
## ✖ dplyr::lag()      masks stats::lag()
## ✖ yardstick::spec() masks readr::spec()
## ✖ recipes::step()   masks stats::step()
## • Learn how to get started at https://www.tidymodels.org/start/
library(tableone)
#library(colino) 
tidymodels_prefer()
library(DALEXtra)
## Loading required package: DALEX
## Welcome to DALEX (version: 2.4.3).
## Find examples and detailed introduction at: http://ema.drwhy.ai/
## Additional features will be available after installation of: ggpubr.
## Use 'install_dependencies()' to get all suggested dependencies
## 
## Attaching package: 'DALEX'
## 
## The following object is masked from 'package:dplyr':
## 
##     explain
library(themis)
library(mrgsolve)
library(truncnorm)
library(mapbayr)
library(MESS)
library(xgboost)
library(lme4)
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack

Dvorchik model implementation

Dvorchik BH, Brazier D, DeBruin MF, Arbeit RD. 2003. Daptomycin Pharmacokinetics and Safety following Administration of Escalating Doses Once Daily to Healthy Subjects. Antimicrob Agents Chemother 47:1318–1323.

library(data.table)

## data creation
code <-
  "
$PARAM @annotated
TVCL:  0.807 : Clearance
TVV1: 4.80 : Central volume
TVV2  : 3.13 : Peripheral volume of distribution
TVQ   :  3.46 : Intercompartmental clearance
CRCL: 0.00514 : effect of CLCR on CL
TCL: 0.14 : effect of body temperature on CL
WTQ: 0.0593 : linear effect of WT deviation on Q
WTV2: 0.0458 : linear effect of WT deviation on V2

ETA1: 0 : Clearance (L/h)
ETA2: 0 : central volume (L)
ETA3: 0 : intercompartmental Clearance (L/h)
ETA4: 0 : peripheral volume (L)


$PARAM @annotated @covariates
CREATCL : 91.2 : estimated creat clearance ml/min
WT: 75.1 : Body weight
SEX : 1 : men (1) women(0)
TEMP : 37.2 : temperature

$OMEGA 0.093636 0.3249 0.425104 0.036481

$SIGMA
0.001 // proportional
0.001 // additive 

$CMT @annotated
CENT  : Central compartment (mg/L)[ADM, OBS]
PERIPH: Peripheral compartment ()

$TABLE
capture DV = (CENT/V1) *(1 + EPS(1)) + EPS(2);
int i = 0;
while(DV<0 && i <100) {
simeps();

DV = (CENT/V1) *(1 + EPS(1)) + EPS(2);
++i;
}

$MAIN
double CL = (((TVCL+ CRCL*(CREATCL - 91.2))+TCL*(TEMP-37.2))*(0.8 + 0.2*SEX)) * exp(ETA1 + ETA(1));
double V1 = TVV1  * exp(ETA2 + ETA(2)) ;
double Q = (TVQ + WTQ * (WT - 75.1)) * exp(ETA3 + ETA(3)) ;
double V2 = (TVV2 + WTV2 * (WT - 75.1)) * exp(ETA4 + ETA(4)) ;

$ODE
dxdt_CENT   =  -(CL+Q)*CENT/V1 + Q*PERIPH/V2 ;
dxdt_PERIPH =  Q*CENT/V1 - Q*PERIPH/V2 ;

$CAPTURE DV CL V2 V1 Q
"

my_model <- mcode("Example_model", code)
## Building Example_model ... done.

Patients simulation

n= 4500

# Variates are weight, temperature, renal function, sex, dose/amount 

i=1
set.seed(str_c(12,i))

WT_data = tibble(ID = 1:4500) %>% 
  mutate(WT = rtruncnorm(n(),a=48, b=153, mean=75.1, sd=30))

set.seed(str_c(12,i))
TEMP_data = tibble(ID = 1:4500) %>% 
  mutate(TEMP = rtruncnorm(n(),a=36.1, b=40.1, mean=37.2, sd=1.5))

set.seed(str_c(12,i))
CREATCL_data = tibble(ID = 1:4500) %>% 
  mutate(CREATCL = rtruncnorm(n(),a=14, b=150, mean=91.2, sd=30))

set.seed(str_c(12,i))
SEX_data = tibble(ID = 1:4500) %>% 
  mutate(SEX = rbinom(n(),1,0.59))

data_ev <- as_tibble(ev(ID = 1:500, amt = 4, ii=24, addl=5, tinf=0.5)) %>%
  mutate(dose_group = "4mg/kg") %>%
  bind_rows(as_tibble(ev(ID = 501:1000, amt = 5,ii=24, addl=5, tinf=0.5))%>%
              mutate(dose_group = "5mg/kg")) %>%
  bind_rows(as_tibble(ev(ID = 1001:1500, amt = 6,ii=24, addl=5, tinf=0.5))%>%
              mutate(dose_group = "6mg/kg"))%>%
  bind_rows(as_tibble(ev(ID = 1501:2000, amt = 7,ii=24, addl=5, tinf=0.5))%>%
              mutate(dose_group = "7mg/kg"))%>%
  bind_rows(as_tibble(ev(ID = 2001:2500, amt = 8,ii=24, addl=5, tinf=0.5))%>%
              mutate(dose_group = "8mg/kg"))%>%
  bind_rows(as_tibble(ev(ID = 2501:3000, amt = 9,ii=24, addl=5, tinf=0.5))%>%
              mutate(dose_group = "9mg/kg"))%>%
  bind_rows(as_tibble(ev(ID = 3001:3500, amt = 10,ii=24, addl=5, tinf=0.5))%>%
              mutate(dose_group = "10mg/kg"))%>%
  bind_rows(as_tibble(ev(ID = 3501:4000, amt = 11,ii=24, addl=5, tinf=0.5))%>%
              mutate(dose_group = "11mg/kg"))%>%
  bind_rows(as_tibble(ev(ID = 4001:4500, amt = 12,ii=24, addl=5, tinf=0.5))%>%
              mutate(dose_group = "12mg/kg"))%>%
  arrange(ID) %>%
  left_join(WT_data) %>%
  left_join(CREATCL_data) %>%
  left_join(SEX_data) %>%
  left_join(TEMP_data) %>%
  mutate(amt = WT*amt, rate = amt/tinf)
## Joining with `by = join_by(ID)`
## Joining with `by = join_by(ID)`
## Joining with `by = join_by(ID)`
## Joining with `by = join_by(ID)`
out <- my_model %>%
  data_set(data_ev) %>%
  Req(DV, CL) %>%
  mrgsim(end = 97, delta = 0.5)

#Creation data simulation
simul_dapto <- as_tibble(out) %>% 
  left_join(data_ev %>% 
              select(ID, dose_group, WT, SEX, CREATCL, TEMP), by = c("ID" = "ID"))

# simul_dapto %>% filter(ID %in% c(1:10)) %>% ggplot(aes(x = time, y = DV)) + geom_point()
# 450 extrem profiles 
set.seed(str_c(12,i))
WT_data_ex = tibble(ID = 4501:4950) %>% 
  mutate(WT = rtruncnorm(n(),a=100, b=153, mean=120, sd=30))

set.seed(str_c(12,i))
CREATCL_data_ex = tibble(ID = 4501:4950) %>% 
  mutate(CREATCL = rtruncnorm(n(),a=14, b=60, mean=30, sd=15))

set.seed(str_c(12,i))
SEX_data_ex = tibble(ID = 4501:4950) %>% 
  mutate(SEX = rbinom(n(),1,0.59))

set.seed(str_c(12,i))
TEMP_data_ex = tibble(ID = 4501:4950) %>%  
  mutate(TEMP = rtruncnorm(n(),a=36.1, b=40.1, mean=37.2, sd=1.5))

data_ev_ex <- as_tibble(ev(ID = 4501:4550, amt = 4, ii=24, addl=5, tinf=0.5)) %>%
  mutate(dose_group = "4mg/kg") %>%
  bind_rows(as_tibble(ev(ID = 4551:4600, amt = 5,ii=24, addl=5, tinf=0.5)) %>%
              mutate(dose_group = "5mg/kg")) %>%
  bind_rows(as_tibble(ev(ID = 4601:4650, amt = 6,ii=24, addl=5, tinf=0.5)) %>%
              mutate(dose_group = "6mg/kg")) %>%
  bind_rows(as_tibble(ev(ID = 4651:4700, amt = 7,ii=24, addl=5, tinf=0.5)) %>%
              mutate(dose_group = "7mg/kg")) %>%
  bind_rows(as_tibble(ev(ID = 4701:4750, amt = 8,ii=24, addl=5, tinf=0.5)) %>%
              mutate(dose_group = "8mg/kg")) %>%
  bind_rows(as_tibble(ev(ID = 4751:4800, amt = 9,ii=24, addl=5, tinf=0.5)) %>%
              mutate(dose_group = "9mg/kg")) %>%
  bind_rows(as_tibble(ev(ID = 4801:4850, amt = 10,ii=24, addl=5, tinf=0.5)) %>%
              mutate(dose_group = "10mg/kg")) %>%
  bind_rows(as_tibble(ev(ID = 4851:4900, amt = 11,ii=24, addl=5, tinf=0.5)) %>%
              mutate(dose_group = "11mg/kg")) %>%
  bind_rows(as_tibble(ev(ID = 4901:4950, amt = 12,ii=24, addl=5, tinf=0.5)) %>%
              mutate(dose_group = "12mg/kg")) %>%
  arrange(ID) %>%
  left_join(WT_data_ex) %>%
  left_join(CREATCL_data_ex) %>%
  left_join(SEX_data_ex) %>%
  left_join(TEMP_data_ex) %>%
  mutate(amt = WT*amt, rate = amt/tinf)
## Joining with `by = join_by(ID)`
## Joining with `by = join_by(ID)`
## Joining with `by = join_by(ID)`
## Joining with `by = join_by(ID)`
out_ex <- my_model %>%
  data_set(data_ev_ex) %>%
  Req(DV, CL) %>%
  mrgsim(end = 97, delta = 0.5)

simul_dapto_ex <- as_tibble(out_ex) %>% 
  left_join(data_ev_ex %>% 
              select(ID, dose_group, WT, SEX, CREATCL, TEMP), by = c("ID" = "ID"))
#Join both files + AUC estimation and sex effect
simul_datpo_all <- simul_dapto %>% 
  bind_rows(simul_dapto_ex) %>%
  mutate(dose = parse_number(dose_group)*WT,
         auc = dose/CL)

simul_datpo_all %>% 
  ggplot(aes(x = time, y = DV )) + 
  geom_point(alpha = 0.2) +
  theme_bw()

simul_datpo_all3 <- simul_datpo_all %>% filter(time==95.5 | time==97) %>%
    mutate(dose = parse_number(dose_group)*WT,
           auc = dose/CL)
        # auc = if_else(SEX==1,dose/CL, (dose/(CL*0.8))))

pivot_conc <- simul_datpo_all3 %>% 
  select(ID, time, DV) %>% 
  pivot_wider(id_cols = ID, names_from = time, names_prefix = "conc_", values_from = DV)

base_prediction_auc <- simul_datpo_all3 %>% 
  filter(time==95.5) %>% 
  select( -time, -DV) %>% left_join(pivot_conc)
## Joining with `by = join_by(ID)`
summary(base_prediction_auc)
##        ID             CL          dose_group              WT        
##  Min.   :   1   Min.   :0.1372   Length:4950        Min.   : 48.00  
##  1st Qu.:1238   1st Qu.:0.5634   Class :character   1st Qu.: 68.39  
##  Median :2476   Median :0.7550   Mode  :character   Median : 85.31  
##  Mean   :2476   Mean   :0.8081                      Mean   : 88.29  
##  3rd Qu.:3713   3rd Qu.:0.9864                      3rd Qu.:105.66  
##  Max.   :4950   Max.   :2.9924                      Max.   :153.00  
##       SEX            CREATCL            TEMP            dose       
##  Min.   :0.0000   Min.   : 14.06   Min.   :36.10   Min.   : 192.0  
##  1st Qu.:0.0000   1st Qu.: 64.00   1st Qu.:36.91   1st Qu.: 464.5  
##  Median :1.0000   Median : 87.42   Median :37.60   Median : 658.7  
##  Mean   :0.5851   Mean   : 84.89   Mean   :37.69   Mean   : 707.5  
##  3rd Qu.:1.0000   3rd Qu.:107.70   3rd Qu.:38.38   3rd Qu.: 896.7  
##  Max.   :1.0000   Max.   :149.81   Max.   :40.10   Max.   :1836.0  
##       auc           conc_95.5            conc_97      
##  Min.   : 121.3   Min.   :  0.01443   Min.   : 10.73  
##  1st Qu.: 550.9   1st Qu.:  4.61015   1st Qu.: 64.21  
##  Median : 860.6   Median : 11.18597   Median : 91.95  
##  Mean   :1063.4   Mean   : 18.31782   Mean   :102.73  
##  3rd Qu.:1339.5   3rd Qu.: 23.78404   3rd Qu.:130.61  
##  Max.   :7107.4   Max.   :196.40586   Max.   :447.25
#Filter 1-99 percentiles
centile_1_auc <- quantile(base_prediction_auc$auc, 0.01)
centile_99_auc <- quantile(base_prediction_auc$auc, 0.99)

centile_1_res <- quantile(base_prediction_auc$conc_95.5, 0.01)
centile_99_res <- quantile(base_prediction_auc$conc_95.5, 0.99)

centile_1_max <- quantile(base_prediction_auc$conc_97, 0.01)
centile_99_max <- quantile(base_prediction_auc$conc_97, 0.99)

base_prediction_auc1 <- base_prediction_auc %>%
  filter(auc >= centile_1_auc, auc <= centile_99_auc,
         conc_95.5 >= centile_1_res, conc_95.5 <= centile_99_res ,
         conc_97 >= centile_1_max, conc_97 <= centile_99_max)

summary(base_prediction_auc1)
##        ID             CL          dose_group              WT        
##  Min.   :   1   Min.   :0.1404   Length:4740        Min.   : 48.00  
##  1st Qu.:1271   1st Qu.:0.5675   Class :character   1st Qu.: 68.81  
##  Median :2476   Median :0.7533   Mode  :character   Median : 85.48  
##  Mean   :2475   Mean   :0.7983                      Mean   : 88.26  
##  3rd Qu.:3680   3rd Qu.:0.9736                      3rd Qu.:105.28  
##  Max.   :4949   Max.   :2.5329                      Max.   :152.93  
##       SEX            CREATCL            TEMP            dose       
##  Min.   :0.0000   Min.   : 14.24   Min.   :36.10   Min.   : 192.0  
##  1st Qu.:0.0000   1st Qu.: 64.54   1st Qu.:36.91   1st Qu.: 470.2  
##  Median :1.0000   Median : 87.50   Median :37.60   Median : 660.0  
##  Mean   :0.5819   Mean   : 85.11   Mean   :37.68   Mean   : 705.0  
##  3rd Qu.:1.0000   3rd Qu.:107.39   3rd Qu.:38.36   3rd Qu.: 887.3  
##  Max.   :1.0000   Max.   :149.81   Max.   :40.10   Max.   :1804.1  
##       auc           conc_95.5          conc_97      
##  Min.   : 208.9   Min.   : 0.1678   Min.   : 26.45  
##  1st Qu.: 564.9   1st Qu.: 4.8124   1st Qu.: 65.02  
##  Median : 866.3   Median :11.3015   Median : 92.18  
##  Mean   :1032.0   Mean   :17.3044   Mean   :101.04  
##  3rd Qu.:1318.8   3rd Qu.:23.2939   3rd Qu.:129.16  
##  Max.   :3807.0   Max.   :99.0063   Max.   :262.16
base_prediction_auc1 %>% 
  ggplot(aes(x= as.factor(SEX), y = auc)) + 
  geom_boxplot() + theme_bw()

fwrite(base_prediction_auc1, file = "data_ML_analysis_pred_auc_cyrielle_res0__filter_1_99_temp_281223.csv")

Machine learning approaches

AUC_dapto <- read.csv("data_ML_analysis_pred_auc_cyrielle_res0__filter_1_99_temp_281223.csv") %>% 
  select(-ID, -dose_group, -CL) %>%
  mutate(Diff_C1C0 = conc_97 - conc_95.5) %>%
  rename(conc_C0 = conc_95.5,
         conc_C1 = conc_97) 
  # mutate(auc = if_else(SEX==1, auc, auc*1.2)) # correction AUC 

summary(AUC_dapto)
##        WT              SEX            CREATCL            TEMP      
##  Min.   : 48.00   Min.   :0.0000   Min.   : 14.24   Min.   :36.10  
##  1st Qu.: 68.81   1st Qu.:0.0000   1st Qu.: 64.54   1st Qu.:36.91  
##  Median : 85.48   Median :1.0000   Median : 87.50   Median :37.60  
##  Mean   : 88.26   Mean   :0.5819   Mean   : 85.11   Mean   :37.68  
##  3rd Qu.:105.28   3rd Qu.:1.0000   3rd Qu.:107.39   3rd Qu.:38.36  
##  Max.   :152.93   Max.   :1.0000   Max.   :149.81   Max.   :40.10  
##       dose             auc            conc_C0           conc_C1      
##  Min.   : 192.0   Min.   : 208.9   Min.   : 0.1678   Min.   : 26.45  
##  1st Qu.: 470.2   1st Qu.: 564.9   1st Qu.: 4.8124   1st Qu.: 65.02  
##  Median : 660.0   Median : 866.3   Median :11.3015   Median : 92.18  
##  Mean   : 705.0   Mean   :1032.0   Mean   :17.3044   Mean   :101.04  
##  3rd Qu.: 887.3   3rd Qu.:1318.8   3rd Qu.:23.2939   3rd Qu.:129.16  
##  Max.   :1804.1   Max.   :3807.0   Max.   :99.0063   Max.   :262.16  
##    Diff_C1C0     
##  Min.   : 12.45  
##  1st Qu.: 53.51  
##  Median : 76.58  
##  Mean   : 83.73  
##  3rd Qu.:108.19  
##  Max.   :246.91
#AUC_dapto %>% ggplot(aes(x = as.factor(SEX), y = dose/auc)) + geom_boxplot()
library(GGally)
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
AUC_dapto %>% select_if(is.numeric) %>% ggpairs()

ggsave("Figure_1.pdf")
## Saving 7 x 5 in image
set.seed(1234)
dapto_split<- initial_split(AUC_dapto, 
                            strata = auc, 
                            prop=3/4)

dapto_ml_train  <- training(dapto_split )
dapto_ml_test  <- testing(dapto_split )

dapto_ml_rec  <- recipe(auc ~ ., data = AUC_dapto) 

dapto_ml_rec_prep <-  prep(dapto_ml_rec )
dapto_train_recipe <-bake(dapto_ml_rec_prep, new_data = NULL)
dapto_test_recipe <-bake(dapto_ml_rec_prep, new_data = dapto_ml_test)
summary(dapto_ml_train)
##        WT              SEX           CREATCL            TEMP      
##  Min.   : 48.00   Min.   :0.000   Min.   : 14.25   Min.   :36.10  
##  1st Qu.: 68.74   1st Qu.:0.000   1st Qu.: 64.68   1st Qu.:36.90  
##  Median : 85.68   Median :1.000   Median : 87.48   Median :37.61  
##  Mean   : 88.45   Mean   :0.578   Mean   : 85.15   Mean   :37.69  
##  3rd Qu.:106.12   3rd Qu.:1.000   3rd Qu.:107.37   3rd Qu.:38.37  
##  Max.   :152.93   Max.   :1.000   Max.   :149.81   Max.   :40.10  
##       dose             auc            conc_C0           conc_C1      
##  Min.   : 192.0   Min.   : 208.9   Min.   : 0.1678   Min.   : 26.58  
##  1st Qu.: 473.0   1st Qu.: 564.9   1st Qu.: 4.8654   1st Qu.: 65.12  
##  Median : 661.0   Median : 866.3   Median :11.3590   Median : 92.38  
##  Mean   : 705.0   Mean   :1033.1   Mean   :17.4131   Mean   :100.77  
##  3rd Qu.: 882.4   3rd Qu.:1318.8   3rd Qu.:23.4540   3rd Qu.:128.36  
##  Max.   :1804.1   Max.   :3807.0   Max.   :99.0063   Max.   :261.68  
##    Diff_C1C0     
##  Min.   : 15.69  
##  1st Qu.: 53.89  
##  Median : 76.68  
##  Mean   : 83.35  
##  3rd Qu.:106.98  
##  Max.   :246.91
summary(dapto_ml_test)
##        WT              SEX            CREATCL            TEMP      
##  Min.   : 48.00   Min.   :0.0000   Min.   : 14.24   Min.   :36.10  
##  1st Qu.: 69.23   1st Qu.:0.0000   1st Qu.: 64.12   1st Qu.:36.92  
##  Median : 85.12   Median :1.0000   Median : 87.70   Median :37.56  
##  Mean   : 87.69   Mean   :0.5934   Mean   : 85.00   Mean   :37.67  
##  3rd Qu.:103.92   3rd Qu.:1.0000   3rd Qu.:107.53   3rd Qu.:38.34  
##  Max.   :150.77   Max.   :1.0000   Max.   :149.79   Max.   :40.09  
##       dose             auc            conc_C0           conc_C1      
##  Min.   : 196.0   Min.   : 212.6   Min.   : 0.1965   Min.   : 26.45  
##  1st Qu.: 465.3   1st Qu.: 566.1   1st Qu.: 4.6958   1st Qu.: 64.83  
##  Median : 655.3   Median : 866.7   Median :11.2225   Median : 91.24  
##  Mean   : 704.8   Mean   :1028.7   Mean   :16.9794   Mean   :101.84  
##  3rd Qu.: 895.9   3rd Qu.:1318.6   3rd Qu.:22.4862   3rd Qu.:132.93  
##  Max.   :1760.0   Max.   :3590.9   Max.   :94.1176   Max.   :262.16  
##    Diff_C1C0     
##  Min.   : 12.45  
##  1st Qu.: 51.82  
##  Median : 76.18  
##  Mean   : 84.86  
##  3rd Qu.:110.74  
##  Max.   :240.79

Xgboost

xgb_spec <- boost_tree(mode = "regression",
                       mtry = tune(),
                       trees = tune(),
                       min_n = tune(),
                       sample_size = tune(),
                       tree_depth = tune(),
                       learn_rate = tune()) %>% 
  set_engine("xgboost")

xgb_wf <- workflow() %>% 
  add_recipe(dapto_ml_rec) %>% 
  add_model(xgb_spec)

#hyperparameters
set.seed(2345)
folds <- vfold_cv(dapto_ml_train, strata = auc)

library(finetune)
set.seed(234)
tune_xgb_ft <- tune_race_anova(xgb_wf,
                               folds,grid = 60,
                               metrics = metric_set(rmse),
                               control = control_race(verbose_elim = TRUE))
## ℹ Racing will minimize the rmse metric.
## ℹ Resamples are analyzed in a random order.
## ℹ Fold10: 27 eliminated; 33 candidates remain.
## 
## ℹ Fold06: 22 eliminated; 11 candidates remain.
## 
## ℹ Fold08: 2 eliminated; 9 candidates remain.
## 
## ℹ Fold01: 2 eliminated; 7 candidates remain.
## 
## ℹ Fold04: 4 eliminated; 3 candidates remain.
## 
## ℹ Fold02: 1 eliminated; 2 candidates remain.
## 
## ℹ Fold09: 0 eliminated; 2 candidates remain.
show_best(tune_xgb_ft)
## # A tibble: 2 × 12
##    mtry trees min_n tree_depth learn_rate sample_size .metric .estimator  mean
##   <int> <int> <int>      <int>      <dbl>       <dbl> <chr>   <chr>      <dbl>
## 1     7  1407     4          6    0.00954       0.730 rmse    standard    78.1
## 2     7  1743     7          7    0.00283       0.699 rmse    standard    78.8
## # ℹ 3 more variables: n <int>, std_err <dbl>, .config <chr>
best_rmse_xgb <- select_best(tune_xgb_ft)

final_xgb <- finalize_model(xgb_spec,best_rmse_xgb)
final_xgb
## Boosted Tree Model Specification (regression)
## 
## Main Arguments:
##   mtry = 7
##   trees = 1407
##   min_n = 4
##   tree_depth = 6
##   learn_rate = 0.00954350456228848
##   sample_size = 0.730449724162463
## 
## Computational engine: xgboost
final_xgb %>% set_engine("xgboost", importance = "permutation") %>%
  fit(auc ~ ., data = juice(dapto_ml_rec_prep)) %>%
  vip::vip(geom = "col") + theme_bw()
## [13:06:23] WARNING: src/learner.cc:767: 
## Parameters: { "importance" } are not used.

ggsave("Figure_3.pdf")
## Saving 7 x 5 in image
final_wf_xgb <- workflow() %>% 
  add_recipe(dapto_ml_rec) %>% 
  add_model(final_xgb)

#resample
set.seed(456)
folds_cv <- vfold_cv(dapto_ml_train, strata = auc)

#10 fold CV
xgb_rs<- fit_resamples(object = final_wf_xgb, 
                       resamples = folds_cv, 
                       control = control_resamples(verbose=T, save_pred = T))
## i Fold01: preprocessor 1/1
## ✓ Fold01: preprocessor 1/1
## i Fold01: preprocessor 1/1, model 1/1
## ✓ Fold01: preprocessor 1/1, model 1/1
## i Fold01: preprocessor 1/1, model 1/1 (extracts)
## i Fold01: preprocessor 1/1, model 1/1 (predictions)
## i Fold02: preprocessor 1/1
## ✓ Fold02: preprocessor 1/1
## i Fold02: preprocessor 1/1, model 1/1
## ✓ Fold02: preprocessor 1/1, model 1/1
## i Fold02: preprocessor 1/1, model 1/1 (extracts)
## i Fold02: preprocessor 1/1, model 1/1 (predictions)
## i Fold03: preprocessor 1/1
## ✓ Fold03: preprocessor 1/1
## i Fold03: preprocessor 1/1, model 1/1
## ✓ Fold03: preprocessor 1/1, model 1/1
## i Fold03: preprocessor 1/1, model 1/1 (extracts)
## i Fold03: preprocessor 1/1, model 1/1 (predictions)
## i Fold04: preprocessor 1/1
## ✓ Fold04: preprocessor 1/1
## i Fold04: preprocessor 1/1, model 1/1
## ✓ Fold04: preprocessor 1/1, model 1/1
## i Fold04: preprocessor 1/1, model 1/1 (extracts)
## i Fold04: preprocessor 1/1, model 1/1 (predictions)
## i Fold05: preprocessor 1/1
## ✓ Fold05: preprocessor 1/1
## i Fold05: preprocessor 1/1, model 1/1
## ✓ Fold05: preprocessor 1/1, model 1/1
## i Fold05: preprocessor 1/1, model 1/1 (extracts)
## i Fold05: preprocessor 1/1, model 1/1 (predictions)
## i Fold06: preprocessor 1/1
## ✓ Fold06: preprocessor 1/1
## i Fold06: preprocessor 1/1, model 1/1
## ✓ Fold06: preprocessor 1/1, model 1/1
## i Fold06: preprocessor 1/1, model 1/1 (extracts)
## i Fold06: preprocessor 1/1, model 1/1 (predictions)
## i Fold07: preprocessor 1/1
## ✓ Fold07: preprocessor 1/1
## i Fold07: preprocessor 1/1, model 1/1
## ✓ Fold07: preprocessor 1/1, model 1/1
## i Fold07: preprocessor 1/1, model 1/1 (extracts)
## i Fold07: preprocessor 1/1, model 1/1 (predictions)
## i Fold08: preprocessor 1/1
## ✓ Fold08: preprocessor 1/1
## i Fold08: preprocessor 1/1, model 1/1
## ✓ Fold08: preprocessor 1/1, model 1/1
## i Fold08: preprocessor 1/1, model 1/1 (extracts)
## i Fold08: preprocessor 1/1, model 1/1 (predictions)
## i Fold09: preprocessor 1/1
## ✓ Fold09: preprocessor 1/1
## i Fold09: preprocessor 1/1, model 1/1
## ✓ Fold09: preprocessor 1/1, model 1/1
## i Fold09: preprocessor 1/1, model 1/1 (extracts)
## i Fold09: preprocessor 1/1, model 1/1 (predictions)
## i Fold10: preprocessor 1/1
## ✓ Fold10: preprocessor 1/1
## i Fold10: preprocessor 1/1, model 1/1
## ✓ Fold10: preprocessor 1/1, model 1/1
## i Fold10: preprocessor 1/1, model 1/1 (extracts)
## i Fold10: preprocessor 1/1, model 1/1 (predictions)
#perf resample
xgb_rs %>% collect_metrics()
## # A tibble: 2 × 6
##   .metric .estimator   mean     n  std_err .config             
##   <chr>   <chr>       <dbl> <int>    <dbl> <chr>               
## 1 rmse    standard   77.5      10 1.89     Preprocessor1_Model1
## 2 rsq     standard    0.985    10 0.000895 Preprocessor1_Model1
xgb_pred_obs <- xgb_rs%>% collect_predictions()

pred_obs_xgb <- xgb_rs%>% collect_predictions() %>%
  mutate (biais_brut = .pred - auc,
          bias_rel = (.pred - auc)/auc,
          bias_rel_square = bias_rel * bias_rel)

pred_obs_xgb%>% 
  summarise(biais_brut = mean(biais_brut),
            biais_rel = mean(bias_rel),
            relative_rmse = sqrt(mean(bias_rel_square)),
            biais_out_20percent = mean(!dplyr::between(bias_rel,-0.2, 0.2)), 
            nb_out_20percent = sum(!dplyr::between(bias_rel,-0.2, 0.2)), 
            n= n())
## # A tibble: 1 × 6
##   biais_brut biais_rel relative_rmse biais_out_20percent nb_out_20percent     n
##        <dbl>     <dbl>         <dbl>               <dbl>            <int> <int>
## 1     0.0482   0.00643        0.0783              0.0239               85  3552
Train_pred_obs <- xgb_pred_obs %>% 
  left_join(dapto_ml_train, by="auc") #join in order to have pred and obs AUC and SEX

Figure_2a <- Train_pred_obs %>%
  ggplot(mapping = aes(x = .pred, y = auc, color= as.factor(SEX))) + 
  geom_point() +
  geom_smooth(method=lm, color="black") + 
  labs(x="Reference AUC (mg*h/L)", y= "Predicted daptomycin AUC (mg*h/L)",
       title="(A)", color="Sex") + 
  theme_bw()
Figure_2a
## `geom_smooth()` using formula = 'y ~ x'

Figure_2c <- xgb_pred_obs%>%
  ggplot(mapping = aes(x = auc, y = auc - .pred)) +
  geom_point() + geom_smooth(method=lm) +
  labs(x = "Reference daptomycin AUC (mg*h/L)", y = "Reference - predicted AUC", 
       title="(C)") + 
  theme_bw()
Figure_2c
## `geom_smooth()` using formula = 'y ~ x'

fit_workflow_xgb <- fit(final_wf_xgb, dapto_ml_train)
saveRDS(fit_workflow_xgb, file = "auc_daptomycin_xgboost_1_99_res0_temp_filter.rds")

#Last fit
final_res <- final_wf_xgb %>%
  last_fit(dapto_split,type = "conf_int")
## Warning: The `...` are not used in this function but one or more objects were
## passed: 'type'
#Performances biais imprecision test
final_res %>% collect_metrics()
## # A tibble: 2 × 4
##   .metric .estimator .estimate .config             
##   <chr>   <chr>          <dbl> <chr>               
## 1 rmse    standard      79.6   Preprocessor1_Model1
## 2 rsq     standard       0.984 Preprocessor1_Model1
final_res_predictions <- final_res %>% collect_predictions() %>%
  rename(AUC_pred = .pred) %>%
  mutate (biais_brut = AUC_pred - auc,
          bias_rel = (AUC_pred - auc)/auc,
          bias_rel_square = bias_rel * bias_rel)

rmarkdown::paged_table(
  as.data.frame(final_res_predictions %>% 
                  summarise(biais_brut= mean(biais_brut),
                            biais_rel = mean(bias_rel), 
                            relative_rmse = sqrt(mean(bias_rel_square)),
                            biais_out_20percent = mean(!dplyr::between(bias_rel,-0.2, 0.2)), 
                            nb_out_20percent = sum(!dplyr::between(bias_rel,-0.2, 0.2)), 
                            n= n())))
Test_pred_obs <- final_res_predictions %>% left_join(dapto_ml_test, by="auc")

#Prediction scatterplot
Figure_2b <- Test_pred_obs %>%
  ggplot(mapping = aes(x = auc, y = AUC_pred, color=as.factor(SEX))) +
  geom_point() +
  geom_smooth(method=lm,color = "black")+
  labs(x = "Reference AUC (mg*h/L)", y = "Predicted daptomycin AUC (mg*h/L)", 
       title="(B)", color ="Sex") +  
  theme_bw()
Figure_2b
## `geom_smooth()` using formula = 'y ~ x'

Figure_2d <- final_res_predictions %>%
  ggplot(mapping = aes(x = auc, y = auc - AUC_pred)) +
  geom_point() +
  geom_smooth(method=lm)+
  labs(x = "Reference daptomycin AUC (mg*h/L)", y = "Reference - predicted AUC", title="(D)") + 
  theme_bw()
Figure_2d
## `geom_smooth()` using formula = 'y ~ x'

library(patchwork)
#wrap_plots(Figure_2a, Figure_2b, ncol=1) #+ Figure_2c + Figure_2d 
#ggsave("Figure_2.pdf")

layout <- wrap_plots(Figure_2a, Figure_2b, ncol = 1)
ggsave("Figure_2.pdf", plot = layout, width = 15, height = 18, units = "cm")
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
table_contingence_666 <- final_res_predictions %>% 
  mutate(event_auc = if_else(auc<=666,"inf_666","sup_666"), 
         event_xgb = if_else(AUC_pred<=666,"inf_666","sup_666")) %>% 
  mutate_if(is.character, factor) 

table_contingence_666 %>% 
  conf_mat(truth = event_auc, estimate = event_xgb)
##           Truth
## Prediction inf_666 sup_666
##    inf_666     390      18
##    sup_666      17     763
# Sensitivity, specifity, negative/positive predictive values
# True P >666
table_contingence_666 %>% yardstick::sens(event_auc, event_xgb, event_level="second", estimator = "binary")
## # A tibble: 1 × 3
##   .metric .estimator .estimate
##   <chr>   <chr>          <dbl>
## 1 sens    binary         0.977
table_contingence_666 %>% yardstick::spec(event_auc, event_xgb, event_level="second", estimator = "binary")
## # A tibble: 1 × 3
##   .metric .estimator .estimate
##   <chr>   <chr>          <dbl>
## 1 spec    binary         0.958
table_contingence_666 %>% yardstick::ppv(event_auc, event_xgb, event_level="second", estimator = "binary")
## # A tibble: 1 × 3
##   .metric .estimator .estimate
##   <chr>   <chr>          <dbl>
## 1 ppv     binary         0.978
table_contingence_666 %>% yardstick::npv(event_auc, event_xgb, event_level="second", estimator = "binary")
## # A tibble: 1 × 3
##   .metric .estimator .estimate
##   <chr>   <chr>          <dbl>
## 1 npv     binary         0.956
table_contingence_939 <- final_res_predictions %>% 
  mutate(event_auc = if_else(auc<=939,"inf_939","sup_939"), 
         event_xgb = if_else(AUC_pred<=939,"inf_939","sup_939")) %>% 
  mutate_if(is.character, factor) 

table_contingence_939 %>% 
  conf_mat(truth = event_auc, estimate = event_xgb) 
##           Truth
## Prediction inf_939 sup_939
##    inf_939     625      32
##    sup_939      31     500
# True P <939
table_contingence_939 %>% yardstick::sens(event_auc, event_xgb)#, event_level="second", estimator = "binary")
## # A tibble: 1 × 3
##   .metric .estimator .estimate
##   <chr>   <chr>          <dbl>
## 1 sens    binary         0.953
table_contingence_939 %>% yardstick::spec(event_auc, event_xgb)#, event_level="second", estimator = "binary")
## # A tibble: 1 × 3
##   .metric .estimator .estimate
##   <chr>   <chr>          <dbl>
## 1 spec    binary         0.940
table_contingence_939 %>% yardstick::ppv(event_auc, event_xgb)#, event_level="second", estimator = "binary")
## # A tibble: 1 × 3
##   .metric .estimator .estimate
##   <chr>   <chr>          <dbl>
## 1 ppv     binary         0.951
table_contingence_939 %>% yardstick::npv(event_auc, event_xgb)#, event_level="second", estimator = "binary")
## # A tibble: 1 × 3
##   .metric .estimator .estimate
##   <chr>   <chr>          <dbl>
## 1 npv     binary         0.942
# AUC target between 666-939
table_contingence_666_939 <- final_res_predictions %>% 
  mutate(event_auc = if_else(dplyr::between(auc, 666,939),"auc_666_939","auc_out"), 
         event_xgb = if_else(dplyr::between(AUC_pred, 666,939),"auc_666_939","auc_out")) %>% 
  mutate_if(is.character, factor) 

table_contingence_666_939 %>% 
  conf_mat(truth = event_auc, estimate = event_xgb) 
##              Truth
## Prediction    auc_666_939 auc_out
##   auc_666_939         200      49
##   auc_out              49     890
# True P >666 & <939
table_contingence_666_939 %>% yardstick::sens(event_auc, event_xgb)#, event_level="second", estimator = "binary")
## # A tibble: 1 × 3
##   .metric .estimator .estimate
##   <chr>   <chr>          <dbl>
## 1 sens    binary         0.803
table_contingence_666_939 %>% yardstick::spec(event_auc, event_xgb)#, event_level="second", estimator = "binary")
## # A tibble: 1 × 3
##   .metric .estimator .estimate
##   <chr>   <chr>          <dbl>
## 1 spec    binary         0.948
table_contingence_666_939 %>% yardstick::ppv(event_auc, event_xgb)#, event_level="second", estimator = "binary")
## # A tibble: 1 × 3
##   .metric .estimator .estimate
##   <chr>   <chr>          <dbl>
## 1 ppv     binary         0.803
table_contingence_666_939 %>% yardstick::npv(event_auc, event_xgb)#, event_level="second", estimator = "binary")
## # A tibble: 1 × 3
##   .metric .estimator .estimate
##   <chr>   <chr>          <dbl>
## 1 npv     binary         0.948
## creation of explainer
explainer_external <- explain_tidymodels(
  model = fit_workflow_xgb, 
  data = dapto_ml_train, 
  y = dapto_ml_train$auc,
  label = "xgb")
## Preparation of a new explainer is initiated
##   -> model label       :  xgb 
##   -> data              :  3552  rows  9  cols 
##   -> target variable   :  3552  values 
##   -> predict function  :  yhat.workflow  will be used (  default  )
##   -> predicted values  :  No value for predict function target column. (  default  )
##   -> model_info        :  package tidymodels , ver. 1.1.1 , task regression (  default  ) 
##   -> predicted values  :  numerical, min =  211.9469 , mean =  1033.047 , max =  3775.979  
##   -> residual function :  difference between y and yhat (  default  )
##   -> residuals         :  numerical, min =  -142.9698 , mean =  0.02628012 , max =  146.3696  
##   A new explainer has been created!
saveRDS(explainer_external, file ="explainer_external.rds" )
set.seed(1234587)
dapto_bd <- dapto_ml_test %>% slice_sample(n=1)
bd_plot <- predict_parts(explainer = explainer_external,
                       new_observation = dapto_bd,
                       type = "break_down")
plot(bd_plot, max_features = 5)

dapto_bd
##         WT SEX  CREATCL     TEMP     dose     auc  conc_C0  conc_C1 Diff_C1C0
## 1 73.52216   1 57.68603 38.05576 735.2216 749.806 5.798273 115.2831  109.4848
dapto_bd1 <- tune::augment(fit_workflow_xgb, dapto_bd)
bd_plot <- predict_parts(explainer = explainer_external,
                       new_observation = dapto_bd1,
                       type = "break_down")
plot(bd_plot, max_features = 5)

Glmet

glm_spec <- linear_reg(mode = "regression",
                       penalty = tune(),
                       mixture = tune()) %>% 
  set_engine("glmnet")

glm_wf <- workflow() %>% 
  add_recipe(dapto_ml_rec) %>% 
  add_model(glm_spec)

#hyperparameters
set.seed(2345)
folds <- vfold_cv(dapto_ml_train, strata = auc)

set.seed(234)
tune_glm_ft <- tune_race_anova(glm_wf,
                               folds,grid = 60,
                               metrics = metric_set(rmse),
                               control = control_race(verbose_elim = TRUE))
## ℹ Racing will minimize the rmse metric.
## ℹ Resamples are analyzed in a random order.
## ℹ Fold10: 43 eliminated; 17 candidates remain.
## 
## ℹ Fold06: 9 eliminated; 8 candidates remain.
## 
## ℹ Fold08: 0 eliminated; 8 candidates remain.
## 
## ℹ Fold01: 0 eliminated; 8 candidates remain.
## 
## ℹ Fold04: 0 eliminated; 8 candidates remain.
## 
## ℹ Fold02: 0 eliminated; 8 candidates remain.
## 
## ℹ Fold09: 0 eliminated; 8 candidates remain.
show_best(tune_glm_ft)
## # A tibble: 5 × 8
##         penalty mixture .metric .estimator  mean     n std_err .config          
##           <dbl>   <dbl> <chr>   <chr>      <dbl> <int>   <dbl> <chr>            
## 1 0.00000528     0.0767 rmse    standard    88.3    10   0.968 Preprocessor1_Mo…
## 2 0.00000000224  0.0605 rmse    standard    88.3    10   0.968 Preprocessor1_Mo…
## 3 0.0000765      0.0886 rmse    standard    88.3    10   0.969 Preprocessor1_Mo…
## 4 0.000000665    0.118  rmse    standard    88.3    10   0.970 Preprocessor1_Mo…
## 5 0.706          0.172  rmse    standard    88.3    10   0.968 Preprocessor1_Mo…
best_rmse_glm <- select_best(tune_glm_ft)

final_glm <- finalize_model(glm_spec,best_rmse_glm)
final_glm
## Linear Regression Model Specification (regression)
## 
## Main Arguments:
##   penalty = 5.27668952808935e-06
##   mixture = 0.0766816805112952
## 
## Computational engine: glmnet
final_glm %>% set_engine("glmnet", importance = "permutation") %>%
  fit(auc ~ ., data = juice(dapto_ml_rec_prep)) %>%
  vip::vip(geom = "col") + theme_bw()

final_wf_glm <- workflow() %>% 
  add_recipe(dapto_ml_rec) %>% 
  add_model(final_glm)

#resample
set.seed(456)
folds_cv <- vfold_cv(dapto_ml_train, strata = auc)

#10 fold CV
glm_rs<- fit_resamples(object = final_wf_glm, 
                       resamples = folds_cv, 
                       control = control_resamples(verbose=T, save_pred = T))
## i Fold01: preprocessor 1/1
## ✓ Fold01: preprocessor 1/1
## i Fold01: preprocessor 1/1, model 1/1
## ✓ Fold01: preprocessor 1/1, model 1/1
## i Fold01: preprocessor 1/1, model 1/1 (extracts)
## i Fold01: preprocessor 1/1, model 1/1 (predictions)
## i Fold02: preprocessor 1/1
## ✓ Fold02: preprocessor 1/1
## i Fold02: preprocessor 1/1, model 1/1
## ✓ Fold02: preprocessor 1/1, model 1/1
## i Fold02: preprocessor 1/1, model 1/1 (extracts)
## i Fold02: preprocessor 1/1, model 1/1 (predictions)
## i Fold03: preprocessor 1/1
## ✓ Fold03: preprocessor 1/1
## i Fold03: preprocessor 1/1, model 1/1
## ✓ Fold03: preprocessor 1/1, model 1/1
## i Fold03: preprocessor 1/1, model 1/1 (extracts)
## i Fold03: preprocessor 1/1, model 1/1 (predictions)
## i Fold04: preprocessor 1/1
## ✓ Fold04: preprocessor 1/1
## i Fold04: preprocessor 1/1, model 1/1
## ✓ Fold04: preprocessor 1/1, model 1/1
## i Fold04: preprocessor 1/1, model 1/1 (extracts)
## i Fold04: preprocessor 1/1, model 1/1 (predictions)
## i Fold05: preprocessor 1/1
## ✓ Fold05: preprocessor 1/1
## i Fold05: preprocessor 1/1, model 1/1
## ✓ Fold05: preprocessor 1/1, model 1/1
## i Fold05: preprocessor 1/1, model 1/1 (extracts)
## i Fold05: preprocessor 1/1, model 1/1 (predictions)
## i Fold06: preprocessor 1/1
## ✓ Fold06: preprocessor 1/1
## i Fold06: preprocessor 1/1, model 1/1
## ✓ Fold06: preprocessor 1/1, model 1/1
## i Fold06: preprocessor 1/1, model 1/1 (extracts)
## i Fold06: preprocessor 1/1, model 1/1 (predictions)
## i Fold07: preprocessor 1/1
## ✓ Fold07: preprocessor 1/1
## i Fold07: preprocessor 1/1, model 1/1
## ✓ Fold07: preprocessor 1/1, model 1/1
## i Fold07: preprocessor 1/1, model 1/1 (extracts)
## i Fold07: preprocessor 1/1, model 1/1 (predictions)
## i Fold08: preprocessor 1/1
## ✓ Fold08: preprocessor 1/1
## i Fold08: preprocessor 1/1, model 1/1
## ✓ Fold08: preprocessor 1/1, model 1/1
## i Fold08: preprocessor 1/1, model 1/1 (extracts)
## i Fold08: preprocessor 1/1, model 1/1 (predictions)
## i Fold09: preprocessor 1/1
## ✓ Fold09: preprocessor 1/1
## i Fold09: preprocessor 1/1, model 1/1
## ✓ Fold09: preprocessor 1/1, model 1/1
## i Fold09: preprocessor 1/1, model 1/1 (extracts)
## i Fold09: preprocessor 1/1, model 1/1 (predictions)
## i Fold10: preprocessor 1/1
## ✓ Fold10: preprocessor 1/1
## i Fold10: preprocessor 1/1, model 1/1
## ✓ Fold10: preprocessor 1/1, model 1/1
## i Fold10: preprocessor 1/1, model 1/1 (extracts)
## i Fold10: preprocessor 1/1, model 1/1 (predictions)
#perf resample
glm_rs %>% collect_metrics()
## # A tibble: 2 × 6
##   .metric .estimator   mean     n  std_err .config             
##   <chr>   <chr>       <dbl> <int>    <dbl> <chr>               
## 1 rmse    standard   88.2      10 1.68     Preprocessor1_Model1
## 2 rsq     standard    0.981    10 0.000990 Preprocessor1_Model1
glm_pred_obs <- glm_rs %>% collect_predictions()

pred_obs_glm <- glm_rs %>% collect_predictions() %>%
  mutate (biais_brut = .pred - auc,
          bias_rel = (.pred - auc)/auc,
          bias_rel_square = bias_rel * bias_rel)

pred_obs_glm %>% 
  summarise(biais_brut = mean(biais_brut),
            biais_rel = mean(bias_rel),
            relative_rmse = sqrt(mean(bias_rel_square)),
            biais_out_20percent = mean(!dplyr::between(bias_rel,-0.2, 0.2)), 
            nb_out_20percent = sum(!dplyr::between(bias_rel,-0.2, 0.2)), 
            n= n())
## # A tibble: 1 × 6
##   biais_brut biais_rel relative_rmse biais_out_20percent nb_out_20percent     n
##        <dbl>     <dbl>         <dbl>               <dbl>            <int> <int>
## 1  -0.000575    0.0125         0.108              0.0501              178  3552
Train_pred_obs_glm <- glm_pred_obs %>% 
  left_join(dapto_ml_train, by="auc") #join in order to have pred and obs AUC and SEX

Figure_2a_glmet <- Train_pred_obs_glm %>%
  ggplot(mapping = aes(x = auc, y = .pred, color= as.factor(SEX))) + 
  geom_point() +
  geom_smooth(method=lm, color="black") + 
  labs(x= "Reference AUC  (mg*h/L)", y="Predicted daptomycin AUC(mg*h/L)", 
       title="(A)", color="Sex") + 
  theme_bw()
Figure_2a_glmet
## `geom_smooth()` using formula = 'y ~ x'

Figure_2c_glmet <- glm_pred_obs %>%
  ggplot(mapping = aes(x = auc, y = auc - .pred)) +
  geom_point() + geom_smooth(method=lm) +
  labs(x = "Reference daptomycin AUC (mg*h/L)", y = "Reference - predicted AUC", title="(C)") + 
  theme_bw()
Figure_2c_glmet
## `geom_smooth()` using formula = 'y ~ x'

fit_workflow_glm <- fit(final_wf_glm, dapto_ml_train)
saveRDS(fit_workflow_glm, file = "auc_daptomycin_glmnet_1_99_res0_temp_filter.rds")

#Last fit
final_res_glmet <- final_wf_glm %>%
  last_fit(dapto_split,type = "conf_int")
## Warning: The `...` are not used in this function but one or more objects were
## passed: 'type'
#Performances biais imprecision test
final_res_glmet %>% collect_metrics()
## # A tibble: 2 × 4
##   .metric .estimator .estimate .config             
##   <chr>   <chr>          <dbl> <chr>               
## 1 rmse    standard      86.2   Preprocessor1_Model1
## 2 rsq     standard       0.981 Preprocessor1_Model1
final_res_predictions_glmet <- final_res_glmet %>% collect_predictions() %>%
  rename(AUC_pred = .pred) %>%
  mutate (biais_brut = AUC_pred - auc,
          bias_rel = (AUC_pred - auc)/auc,
          bias_rel_square = bias_rel * bias_rel)

rmarkdown::paged_table(
  as.data.frame(final_res_predictions_glmet %>% 
                  summarise(biais_brut= mean(biais_brut),
                            biais_rel = mean(bias_rel), 
                            relative_rmse = sqrt(mean(bias_rel_square)),
                            biais_out_20percent = mean(!dplyr::between(bias_rel,-0.2, 0.2)), 
                            nb_out_20percent = sum(!dplyr::between(bias_rel,-0.2, 0.2)), 
                            n= n())))
Test_pred_obs_glmet <- final_res_predictions_glmet %>% left_join(dapto_ml_test, by="auc")

#Prediction scatterplot
Figure_2b_glmet <- Test_pred_obs_glmet %>%
  ggplot(mapping = aes(x = auc, y = AUC_pred, color=as.factor(SEX))) +
  geom_point() +
  geom_smooth(method=lm,color = "black")+
  labs(x = "Reference AUC (mg*h/L)", y = "Predicted daptomycin AUC (mg*h/L)", 
       title="(B)", color ="Sex") +  
  theme_bw()
Figure_2b_glmet
## `geom_smooth()` using formula = 'y ~ x'

Figure_2d_glmet <- final_res_predictions_glmet %>%
  ggplot(mapping = aes(x = auc, y = auc - AUC_pred)) +
  geom_point() +
  geom_smooth(method=lm)+
  labs(x = "Reference daptomycin AUC (mg*h/L)", y = "Reference - predicted AUC", title="(D)") + 
  theme_bw()
Figure_2d_glmet
## `geom_smooth()` using formula = 'y ~ x'

final_res_predictions_glmet %>% 
  mutate(event_auc = if_else(auc<=666,"inf_666","sup_666"), 
         event_glm = if_else(AUC_pred<=666,"inf_666","sup_666")) %>% 
  mutate_if(is.character, factor) %>% 
  conf_mat(truth = event_auc, estimate = event_glm) 
##           Truth
## Prediction inf_666 sup_666
##    inf_666     378      13
##    sup_666      29     768
final_res_predictions_glmet %>% 
  mutate(event_auc = if_else(auc<=939,"inf_939","sup_939"), 
         event_glm = if_else(AUC_pred<=939,"inf_939","sup_939")) %>% 
  mutate_if(is.character, factor) %>% 
  conf_mat(truth = event_auc, estimate = event_glm) 
##           Truth
## Prediction inf_939 sup_939
##    inf_939     628      33
##    sup_939      28     499
# AUC target between 666-939
final_res_predictions_glmet %>% 
  mutate(event_auc = if_else(dplyr::between(auc, 666,939),"auc_666_939","auc_out"), 
         event_glm = if_else(dplyr::between(AUC_pred, 666,939),"auc_666_939","auc_out")) %>% 
  mutate_if(is.character, factor) %>% 
  conf_mat(truth = event_auc, estimate = event_glm) 
##              Truth
## Prediction    auc_666_939 auc_out
##   auc_666_939         209      61
##   auc_out              40     878
wrap_plots(Figure_2a_glmet, Figure_2b_glmet, ncol=1) #+ Figure_2c_glmet + Figure_2d_glmet
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

Random forest

rf_spec <- rand_forest(mode = "regression",
                       mtry = tune(),
                       min_n = tune(),
                       trees = 1000) %>% 
  set_engine("ranger")

rf_wf <- workflow() %>% 
  add_recipe(dapto_ml_rec) %>% 
  add_model(rf_spec)

#hyperparameters
set.seed(2345)
folds <- vfold_cv(dapto_ml_train, strata = auc)

set.seed(234)
tune_rf_ft <- tune_race_anova(rf_wf,
                               folds,grid = 60,
                               metrics = metric_set(rmse),
                               control = control_race(verbose_elim = TRUE))
## ℹ Racing will minimize the rmse metric.
## ℹ Resamples are analyzed in a random order.
## ℹ Fold10: 18 eliminated; 42 candidates remain.
## 
## ℹ Fold06: 23 eliminated; 19 candidates remain.
## 
## ℹ Fold08: 2 eliminated; 17 candidates remain.
## 
## ℹ Fold01: 3 eliminated; 14 candidates remain.
## 
## ℹ Fold04: 2 eliminated; 12 candidates remain.
## 
## ℹ Fold02: 0 eliminated; 12 candidates remain.
## 
## ℹ Fold09: 1 eliminated; 11 candidates remain.
show_best(tune_rf_ft)
## # A tibble: 5 × 8
##    mtry min_n .metric .estimator  mean     n std_err .config              
##   <int> <int> <chr>   <chr>      <dbl> <int>   <dbl> <chr>                
## 1     5    16 rmse    standard    80.6    10    1.27 Preprocessor1_Model29
## 2     5     4 rmse    standard    80.7    10    1.23 Preprocessor1_Model17
## 3     6    11 rmse    standard    80.7    10    1.38 Preprocessor1_Model38
## 4     6    19 rmse    standard    80.7    10    1.44 Preprocessor1_Model18
## 5     5    14 rmse    standard    80.7    10    1.31 Preprocessor1_Model23
best_rmse_rf <- select_best(tune_rf_ft)

final_rf <- finalize_model(rf_spec,best_rmse_rf)
final_rf
## Random Forest Model Specification (regression)
## 
## Main Arguments:
##   mtry = 5
##   trees = 1000
##   min_n = 16
## 
## Computational engine: ranger
final_rf %>% set_engine("ranger", importance = "permutation") %>%
  fit(auc ~ ., data = juice(dapto_ml_rec_prep)) %>%
  vip::vip(geom = "col") + theme_bw()

final_wf_rf <- workflow() %>% 
  add_recipe(dapto_ml_rec) %>% 
  add_model(final_rf)

#resample
set.seed(456)
folds_cv <- vfold_cv(dapto_ml_train, strata = auc)

#10 fold CV
rf_rs<- fit_resamples(object = final_wf_rf, 
                       resamples = folds_cv, 
                       control = control_resamples(verbose=T, save_pred = T))
## i Fold01: preprocessor 1/1
## ✓ Fold01: preprocessor 1/1
## i Fold01: preprocessor 1/1, model 1/1
## ✓ Fold01: preprocessor 1/1, model 1/1
## i Fold01: preprocessor 1/1, model 1/1 (extracts)
## i Fold01: preprocessor 1/1, model 1/1 (predictions)
## i Fold02: preprocessor 1/1
## ✓ Fold02: preprocessor 1/1
## i Fold02: preprocessor 1/1, model 1/1
## ✓ Fold02: preprocessor 1/1, model 1/1
## i Fold02: preprocessor 1/1, model 1/1 (extracts)
## i Fold02: preprocessor 1/1, model 1/1 (predictions)
## i Fold03: preprocessor 1/1
## ✓ Fold03: preprocessor 1/1
## i Fold03: preprocessor 1/1, model 1/1
## ✓ Fold03: preprocessor 1/1, model 1/1
## i Fold03: preprocessor 1/1, model 1/1 (extracts)
## i Fold03: preprocessor 1/1, model 1/1 (predictions)
## i Fold04: preprocessor 1/1
## ✓ Fold04: preprocessor 1/1
## i Fold04: preprocessor 1/1, model 1/1
## ✓ Fold04: preprocessor 1/1, model 1/1
## i Fold04: preprocessor 1/1, model 1/1 (extracts)
## i Fold04: preprocessor 1/1, model 1/1 (predictions)
## i Fold05: preprocessor 1/1
## ✓ Fold05: preprocessor 1/1
## i Fold05: preprocessor 1/1, model 1/1
## ✓ Fold05: preprocessor 1/1, model 1/1
## i Fold05: preprocessor 1/1, model 1/1 (extracts)
## i Fold05: preprocessor 1/1, model 1/1 (predictions)
## i Fold06: preprocessor 1/1
## ✓ Fold06: preprocessor 1/1
## i Fold06: preprocessor 1/1, model 1/1
## ✓ Fold06: preprocessor 1/1, model 1/1
## i Fold06: preprocessor 1/1, model 1/1 (extracts)
## i Fold06: preprocessor 1/1, model 1/1 (predictions)
## i Fold07: preprocessor 1/1
## ✓ Fold07: preprocessor 1/1
## i Fold07: preprocessor 1/1, model 1/1
## ✓ Fold07: preprocessor 1/1, model 1/1
## i Fold07: preprocessor 1/1, model 1/1 (extracts)
## i Fold07: preprocessor 1/1, model 1/1 (predictions)
## i Fold08: preprocessor 1/1
## ✓ Fold08: preprocessor 1/1
## i Fold08: preprocessor 1/1, model 1/1
## ✓ Fold08: preprocessor 1/1, model 1/1
## i Fold08: preprocessor 1/1, model 1/1 (extracts)
## i Fold08: preprocessor 1/1, model 1/1 (predictions)
## i Fold09: preprocessor 1/1
## ✓ Fold09: preprocessor 1/1
## i Fold09: preprocessor 1/1, model 1/1
## ✓ Fold09: preprocessor 1/1, model 1/1
## i Fold09: preprocessor 1/1, model 1/1 (extracts)
## i Fold09: preprocessor 1/1, model 1/1 (predictions)
## i Fold10: preprocessor 1/1
## ✓ Fold10: preprocessor 1/1
## i Fold10: preprocessor 1/1, model 1/1
## ✓ Fold10: preprocessor 1/1, model 1/1
## i Fold10: preprocessor 1/1, model 1/1 (extracts)
## i Fold10: preprocessor 1/1, model 1/1 (predictions)
#perf resample
rf_rs %>% collect_metrics()
## # A tibble: 2 × 6
##   .metric .estimator   mean     n  std_err .config             
##   <chr>   <chr>       <dbl> <int>    <dbl> <chr>               
## 1 rmse    standard   80.3      10 2.52     Preprocessor1_Model1
## 2 rsq     standard    0.984    10 0.000986 Preprocessor1_Model1
rf_pred_obs <- rf_rs %>% collect_predictions()

pred_obs_rf <- rf_rs %>% collect_predictions() %>%
  mutate (biais_brut = .pred - auc,
          bias_rel = (.pred - auc)/auc,
          bias_rel_square = bias_rel * bias_rel)

pred_obs_rf %>% 
  summarise(biais_brut = mean(biais_brut),
            biais_rel = mean(bias_rel),
            relative_rmse = sqrt(mean(bias_rel_square)),
            biais_out_20percent = mean(!dplyr::between(bias_rel,-0.2, 0.2)), 
            nb_out_20percent = sum(!dplyr::between(bias_rel,-0.2, 0.2)), 
            n= n())
## # A tibble: 1 × 6
##   biais_brut biais_rel relative_rmse biais_out_20percent nb_out_20percent     n
##        <dbl>     <dbl>         <dbl>               <dbl>            <int> <int>
## 1     -0.119   0.00927        0.0827              0.0284              101  3552
Train_pred_obs_rf <- rf_pred_obs %>% 
  left_join(dapto_ml_train, by="auc") #join in order to have pred and obs AUC and SEX

Figure_2a_rf <- Train_pred_obs_rf %>%
  ggplot(mapping = aes(x = auc, y = .pred,  color= as.factor(SEX))) + 
  geom_point() +
  geom_smooth(method=lm, color="black") + 
  labs(x="Reference AUC (mg*h/L)", y= "Predicted daptomycin AUC (mg*h/L)", 
       title="(A)", color="Sex") + 
  theme_bw()
Figure_2a_rf
## `geom_smooth()` using formula = 'y ~ x'

Figure_2c_rf <- rf_pred_obs %>%
  ggplot(mapping = aes(x = auc, y = auc - .pred)) +
  geom_point() + geom_smooth(method=lm) +
  labs(x = "Reference daptomycin AUC (mg*h/L)", y = "Reference - predicted AUC", title="(C)")+  theme_bw()
Figure_2c_rf
## `geom_smooth()` using formula = 'y ~ x'

fit_workflow_rf <- fit(final_wf_rf, dapto_ml_train)
saveRDS(fit_workflow_rf, file = "auc_daptomycin_rf_1_99_res0_temp_filter.rds")

#Last fit
final_res_rf <- final_wf_rf %>%
  last_fit(dapto_split,type = "conf_int")
## Warning: The `...` are not used in this function but one or more objects were
## passed: 'type'
#Performances biais imprecision test
final_res_rf %>% collect_metrics()
## # A tibble: 2 × 4
##   .metric .estimator .estimate .config             
##   <chr>   <chr>          <dbl> <chr>               
## 1 rmse    standard      79.9   Preprocessor1_Model1
## 2 rsq     standard       0.984 Preprocessor1_Model1
final_res_predictions_rf <- final_res_rf %>% collect_predictions() %>%
  rename(AUC_pred = .pred) %>%
  mutate (biais_brut = AUC_pred - auc,
          bias_rel = (AUC_pred - auc)/auc,
          bias_rel_square = bias_rel * bias_rel)

rmarkdown::paged_table(
  as.data.frame(final_res_predictions_rf %>% 
                  summarise(biais_brut= mean(biais_brut),
                            biais_rel = mean(bias_rel), 
                            relative_rmse = sqrt(mean(bias_rel_square)),
                            biais_out_20percent = mean(!dplyr::between(bias_rel,-0.2, 0.2)), 
                            nb_out_20percent = sum(!dplyr::between(bias_rel,-0.2, 0.2)), 
                            n= n())))
Test_pred_obs_rf <- final_res_predictions_rf %>% left_join(dapto_ml_test, by="auc")

#Prediction scatterplot
Figure_2b_rf <- Test_pred_obs_rf %>%
  ggplot(mapping = aes(x = auc, y =AUC_pred,  color=as.factor(SEX))) +
  geom_point() +
  geom_smooth(method=lm,color = "black")+
  labs(x = "Reference AUC (mg*h/L)", y = "Predicted daptomycin AUC (mg*h/L)", 
       title="(B)", color ="Sex") + 
  theme_bw()
Figure_2b_rf
## `geom_smooth()` using formula = 'y ~ x'

Figure_2d_rf <- final_res_predictions_rf %>%
  ggplot(mapping = aes(x = auc, y = auc - AUC_pred)) +
  geom_point() +
  geom_smooth(method=lm)+
  labs(x = "Reference daptomycin AUC (mg*h/L)", y = "Reference - predicted AUC", title="(D)") + 
  theme_bw()
Figure_2d_rf
## `geom_smooth()` using formula = 'y ~ x'

final_res_predictions_rf %>% 
  mutate(event_auc = if_else(auc<=666,"inf_666","sup_666"), 
         event_rf = if_else(AUC_pred<=666,"inf_666","sup_666")) %>% 
  mutate_if(is.character, factor) %>% 
  conf_mat(truth = event_auc, estimate = event_rf) 
##           Truth
## Prediction inf_666 sup_666
##    inf_666     389      16
##    sup_666      18     765
final_res_predictions_rf %>% 
  mutate(event_auc = if_else(auc<=939,"inf_939","sup_939"), 
         event_rf = if_else(AUC_pred<=939,"inf_939","sup_939")) %>% 
  mutate_if(is.character, factor) %>% 
  conf_mat(truth = event_auc, estimate = event_rf) 
##           Truth
## Prediction inf_939 sup_939
##    inf_939     627      32
##    sup_939      29     500
# AUC target between 666-939
final_res_predictions_rf %>% 
  mutate(event_auc = if_else(dplyr::between(auc, 666,939),"auc_666_939","auc_out"), 
         event_rf = if_else(dplyr::between(AUC_pred, 666,939),"auc_666_939","auc_out")) %>% 
  mutate_if(is.character, factor) %>% 
  conf_mat(truth = event_auc, estimate = event_rf)
##              Truth
## Prediction    auc_666_939 auc_out
##   auc_666_939         204      50
##   auc_out              45     889
wrap_plots(Figure_2a_rf, Figure_2b_rf, ncol=1) #+ Figure_2c_rf + Figure_2d_rf
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

SVM

svm_spec <- svm_linear(mode = "regression",
                       cost = tune(),
                       margin = tune()
                       ) %>% 
  set_engine("kernlab")

svm_wf <- workflow() %>% 
  add_recipe(dapto_ml_rec) %>% 
  add_model(svm_spec)

#hyperparameters
set.seed(2345)
folds <- vfold_cv(dapto_ml_train, strata = auc)

set.seed(234)
tune_svm_ft <- tune_race_anova(svm_wf,
                               folds,grid = 60,
                               metrics = metric_set(rmse),
                               control = control_race(verbose_elim = TRUE))
## ℹ Racing will minimize the rmse metric.
## ℹ Resamples are analyzed in a random order.
## ℹ Fold10: 7 eliminated; 53 candidates remain.
## 
## ℹ Fold06: 8 eliminated; 45 candidates remain.
## 
## ℹ Fold08: 9 eliminated; 36 candidates remain.
## 
## ℹ Fold01: 13 eliminated; 23 candidates remain.
## 
## ℹ Fold04: 3 eliminated; 20 candidates remain.
## 
## ℹ Fold02: 1 eliminated; 19 candidates remain.
## 
## ℹ Fold09: 1 eliminated; 18 candidates remain.
show_best(tune_svm_ft)
## # A tibble: 5 × 8
##    cost margin .metric .estimator  mean     n std_err .config              
##   <dbl>  <dbl> <chr>   <chr>      <dbl> <int>   <dbl> <chr>                
## 1 4.24  0.0832 rmse    standard    88.3    10   0.999 Preprocessor1_Model55
## 2 1.08  0.0765 rmse    standard    88.3    10   0.999 Preprocessor1_Model37
## 3 0.531 0.0904 rmse    standard    88.3    10   0.996 Preprocessor1_Model40
## 4 2.74  0.0888 rmse    standard    88.3    10   0.998 Preprocessor1_Model18
## 5 0.363 0.0714 rmse    standard    88.3    10   1.01  Preprocessor1_Model29
best_rmse_svm <- select_best(tune_svm_ft)

final_svm <- finalize_model(svm_spec,best_rmse_svm)
final_svm
## Linear Support Vector Machine Model Specification (regression)
## 
## Main Arguments:
##   cost = 4.24458757692946
##   margin = 0.0832181700148309
## 
## Computational engine: kernlab
# final_svm %>% set_engine("kernlab", importance = "permutation") %>%
#   fit(auc ~ ., data = juice(dapto_ml_rec_prep)) %>%
#   vip::vip(geom = "col") + theme_bw()
final_wf_svm <- workflow() %>% 
  add_recipe(dapto_ml_rec) %>% 
  add_model(final_svm)

#resample
set.seed(456)
folds_cv <- vfold_cv(dapto_ml_train, strata = auc)

#10 fold CV
svm_rs<- fit_resamples(object = final_wf_svm, 
                       resamples = folds_cv, 
                       control = control_resamples(verbose=T, save_pred = T))
## i Fold01: preprocessor 1/1
## ✓ Fold01: preprocessor 1/1
## i Fold01: preprocessor 1/1, model 1/1
## ✓ Fold01: preprocessor 1/1, model 1/1
## i Fold01: preprocessor 1/1, model 1/1 (extracts)
## i Fold01: preprocessor 1/1, model 1/1 (predictions)
## i Fold02: preprocessor 1/1
## ✓ Fold02: preprocessor 1/1
## i Fold02: preprocessor 1/1, model 1/1
## ✓ Fold02: preprocessor 1/1, model 1/1
## i Fold02: preprocessor 1/1, model 1/1 (extracts)
## i Fold02: preprocessor 1/1, model 1/1 (predictions)
## i Fold03: preprocessor 1/1
## ✓ Fold03: preprocessor 1/1
## i Fold03: preprocessor 1/1, model 1/1
## ✓ Fold03: preprocessor 1/1, model 1/1
## i Fold03: preprocessor 1/1, model 1/1 (extracts)
## i Fold03: preprocessor 1/1, model 1/1 (predictions)
## i Fold04: preprocessor 1/1
## ✓ Fold04: preprocessor 1/1
## i Fold04: preprocessor 1/1, model 1/1
## ✓ Fold04: preprocessor 1/1, model 1/1
## i Fold04: preprocessor 1/1, model 1/1 (extracts)
## i Fold04: preprocessor 1/1, model 1/1 (predictions)
## i Fold05: preprocessor 1/1
## ✓ Fold05: preprocessor 1/1
## i Fold05: preprocessor 1/1, model 1/1
## ✓ Fold05: preprocessor 1/1, model 1/1
## i Fold05: preprocessor 1/1, model 1/1 (extracts)
## i Fold05: preprocessor 1/1, model 1/1 (predictions)
## i Fold06: preprocessor 1/1
## ✓ Fold06: preprocessor 1/1
## i Fold06: preprocessor 1/1, model 1/1
## ✓ Fold06: preprocessor 1/1, model 1/1
## i Fold06: preprocessor 1/1, model 1/1 (extracts)
## i Fold06: preprocessor 1/1, model 1/1 (predictions)
## i Fold07: preprocessor 1/1
## ✓ Fold07: preprocessor 1/1
## i Fold07: preprocessor 1/1, model 1/1
## ✓ Fold07: preprocessor 1/1, model 1/1
## i Fold07: preprocessor 1/1, model 1/1 (extracts)
## i Fold07: preprocessor 1/1, model 1/1 (predictions)
## i Fold08: preprocessor 1/1
## ✓ Fold08: preprocessor 1/1
## i Fold08: preprocessor 1/1, model 1/1
## ✓ Fold08: preprocessor 1/1, model 1/1
## i Fold08: preprocessor 1/1, model 1/1 (extracts)
## i Fold08: preprocessor 1/1, model 1/1 (predictions)
## i Fold09: preprocessor 1/1
## ✓ Fold09: preprocessor 1/1
## i Fold09: preprocessor 1/1, model 1/1
## ✓ Fold09: preprocessor 1/1, model 1/1
## i Fold09: preprocessor 1/1, model 1/1 (extracts)
## i Fold09: preprocessor 1/1, model 1/1 (predictions)
## i Fold10: preprocessor 1/1
## ✓ Fold10: preprocessor 1/1
## i Fold10: preprocessor 1/1, model 1/1
## ✓ Fold10: preprocessor 1/1, model 1/1
## i Fold10: preprocessor 1/1, model 1/1 (extracts)
## i Fold10: preprocessor 1/1, model 1/1 (predictions)
#pesvm resample
svm_rs %>% collect_metrics()
## # A tibble: 2 × 6
##   .metric .estimator   mean     n  std_err .config             
##   <chr>   <chr>       <dbl> <int>    <dbl> <chr>               
## 1 rmse    standard   88.2      10 1.63     Preprocessor1_Model1
## 2 rsq     standard    0.981    10 0.000981 Preprocessor1_Model1
svm_pred_obs <- svm_rs %>% collect_predictions()

pred_obs_svm <- svm_rs %>% collect_predictions() %>%
  mutate (biais_brut = .pred - auc,
          bias_rel = (.pred - auc)/auc,
          bias_rel_square = bias_rel * bias_rel)

pred_obs_svm %>% 
  summarise(biais_brut = mean(biais_brut),
            biais_rel = mean(bias_rel),
            relative_rmse = sqrt(mean(bias_rel_square)),
            biais_out_20percent = mean(!dplyr::between(bias_rel,-0.2, 0.2)), 
            nb_out_20percent = sum(!dplyr::between(bias_rel,-0.2, 0.2)), 
            n= n())
## # A tibble: 1 × 6
##   biais_brut biais_rel relative_rmse biais_out_20percent nb_out_20percent     n
##        <dbl>     <dbl>         <dbl>               <dbl>            <int> <int>
## 1     -0.586    0.0122         0.109              0.0507              180  3552
Train_pred_obs_svm <- svm_pred_obs %>% 
  left_join(dapto_ml_train, by="auc") #join in order to have pred and obs AUC and SEX

Figure_2a_svm <- Train_pred_obs_svm %>%
  ggplot(mapping = aes(x = auc, y = .pred,  color= as.factor(SEX))) + 
  geom_point() +
  geom_smooth(method=lm, color="black") + 
  labs(x="Reference AUC (mg*h/L)", y= "Predicted daptomycin AUC (mg*h/L)", 
       title="(A)", color="Sex") + 
  theme_bw()
Figure_2a_svm
## `geom_smooth()` using formula = 'y ~ x'

Figure_2c_svm <- svm_pred_obs %>%
  ggplot(mapping = aes(x = auc, y = auc - .pred)) +
  geom_point() + geom_smooth(method=lm) +
  labs(x = "Reference daptomycin AUC (mg*h/L)", y = "Reference - predicted AUC", title="(C)") +
  theme_bw()
Figure_2c_svm
## `geom_smooth()` using formula = 'y ~ x'

fit_workflow_svm <- fit(final_wf_svm, dapto_ml_train)
##  Setting default kernel parameters
saveRDS(fit_workflow_svm, file = "auc_daptomycin_svm_1_99_res0_temp_filter.rds")

#Last fit
final_res_svm <- final_wf_svm %>%
  last_fit(dapto_split,type = "conf_int")
## Warning: The `...` are not used in this function but one or more objects were
## passed: 'type'
#Performances biais imprecision test
final_res_svm %>% collect_metrics()
## # A tibble: 2 × 4
##   .metric .estimator .estimate .config             
##   <chr>   <chr>          <dbl> <chr>               
## 1 rmse    standard      85.9   Preprocessor1_Model1
## 2 rsq     standard       0.982 Preprocessor1_Model1
final_res_predictions_svm <- final_res_svm %>% collect_predictions() %>%
  rename(AUC_pred = .pred) %>%
  mutate (biais_brut = AUC_pred - auc,
          bias_rel = (AUC_pred - auc)/auc,
          bias_rel_square = bias_rel * bias_rel)

rmarkdown::paged_table(
  as.data.frame(final_res_predictions_svm %>% 
                  summarise(biais_brut= mean(biais_brut),
                            biais_rel = mean(bias_rel), 
                            relative_rmse = sqrt(mean(bias_rel_square)),
                            biais_out_20percent = mean(!dplyr::between(bias_rel,-0.2, 0.2)), 
                            nb_out_20percent = sum(!dplyr::between(bias_rel,-0.2, 0.2)), 
                            n= n())))
Test_pred_obs_svm <- final_res_predictions_svm %>% left_join(dapto_ml_test, by="auc")

#Prediction scatterplot
Figure_2b_svm <- Test_pred_obs_svm %>%
  ggplot(mapping = aes(x = auc, y = AUC_pred, color=as.factor(SEX))) +
  geom_point() +
  geom_smooth(method=lm,color = "black")+
  labs( x = "Reference AUC (mg*h/L)", y = "Predicted daptomycin AUC (mg*h/L)",
       title="(B)", color ="Sex") +  
  theme_bw()
Figure_2b_svm
## `geom_smooth()` using formula = 'y ~ x'

Figure_2d_svm <- final_res_predictions_svm %>%
  ggplot(mapping = aes(x = auc, y = auc - AUC_pred)) +
  geom_point() +
  geom_smooth(method=lm)+
  labs(x = "Reference daptomycin AUC (mg*h/L)", y = "Reference - predicted AUC", title="(D)") +
  theme_bw()
Figure_2d_svm
## `geom_smooth()` using formula = 'y ~ x'

final_res_predictions_svm %>% 
  mutate(event_auc = if_else(auc<=666,"inf_666","sup_666"), 
         event_svm = if_else(AUC_pred<=666,"inf_666","sup_666")) %>% 
  mutate_if(is.character, factor) %>% 
  conf_mat(truth = event_auc, estimate = event_svm) 
##           Truth
## Prediction inf_666 sup_666
##    inf_666     380      14
##    sup_666      27     767
final_res_predictions_svm %>% 
  mutate(event_auc = if_else(auc<=939,"inf_939","sup_939"), 
         event_svm = if_else(AUC_pred<=939,"inf_939","sup_939")) %>% 
  mutate_if(is.character, factor) %>% 
  conf_mat(truth = event_auc, estimate = event_svm) 
##           Truth
## Prediction inf_939 sup_939
##    inf_939     630      33
##    sup_939      26     499
# AUC target between 666-939
final_res_predictions_svm %>% 
  mutate(event_auc = if_else(dplyr::between(auc, 666,939),"auc_666_939","auc_out"), 
         event_svm = if_else(dplyr::between(AUC_pred, 666,939),"auc_666_939","auc_out")) %>% 
  mutate_if(is.character, factor) %>% 
  conf_mat(truth = event_auc, estimate = event_svm) 
##              Truth
## Prediction    auc_666_939 auc_out
##   auc_666_939         210      59
##   auc_out              39     880
wrap_plots(Figure_2a_svm, Figure_2b_svm, ncol=1) #+ Figure_2c_svm + Figure_2d_svm
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'